From 6c18e9c491959ac0674ebe36b09f9ddc3f2c9bce Mon Sep 17 00:00:00 2001
From: Birger Koblitz <git@birger-koblitz.de>
Date: Fri, 31 Dec 2021 11:56:49 +0100
Subject: [PATCH] realtek: Add VPE support for the IRQ driver

In order to support VSMP, enable support for both VPEs
of the RTL839X and RTL930X SoCs in the irq-realtek-rtl
driver. Add support for IRQ affinity setting.

Submitted-by: Birger Koblitz <git@birger-koblitz.de>
---
 drivers/irqchip/irq-realtek-rtl.c             | 152 +++++++++++++++---
 1 file changed, 73 insertions(+), 76 deletions(-)

--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -21,21 +21,63 @@
 #define RTL_ICTL_IRR2		0x10
 #define RTL_ICTL_IRR3		0x14
 
-#define REG(x)		(realtek_ictl_base + x)
+#define RTL_ICTL_NUM_INPUTS	32
+#define RTL_ICTL_NUM_OUTPUTS	15
 
 static DEFINE_RAW_SPINLOCK(irq_lock);
-static void __iomem *realtek_ictl_base;
+
+#define REG(offset, cpu)	(realtek_ictl_base[cpu] + offset)
+
+static void __iomem *realtek_ictl_base[NR_CPUS];
+static cpumask_t realtek_ictl_cpu_configurable;
+
+struct realtek_ictl_output {
+	/* IRQ controller data */
+	struct fwnode_handle *fwnode;
+	/* Output specific data */
+	unsigned int output_index;
+	struct irq_domain *domain;
+	u32 child_mask;
+};
+
+/*
+ * IRR0-IRR3 store 4 bits per interrupt, but Realtek uses inverted numbering,
+ * placing IRQ 31 in the first four bits. A routing value of '0' means the
+ * interrupt is left disconnected. Routing values {1..15} connect to output
+ * lines {0..14}.
+ */
+#define IRR_OFFSET(idx)		(4 * (3 - (idx * 4) / 32))
+#define IRR_SHIFT(idx)		((idx * 4) % 32)
+
+static inline u32 read_irr(void __iomem *irr0, int idx)
+{
+	return (readl(irr0 + IRR_OFFSET(idx)) >> IRR_SHIFT(idx)) & 0xf;
+}
+
+static inline void write_irr(void __iomem *irr0, int idx, u32 value)
+{
+	unsigned int offset = IRR_OFFSET(idx);
+	unsigned int shift = IRR_SHIFT(idx);
+	u32 irr;
+
+	irr = readl(irr0 + offset) & ~(0xf << shift);
+	irr |= (value & 0xf) << shift;
+	writel(irr, irr0 + offset);
+}
 
 static void realtek_ictl_unmask_irq(struct irq_data *i)
 {
 	unsigned long flags;
 	u32 value;
+	int cpu;
 
 	raw_spin_lock_irqsave(&irq_lock, flags);
 
-	value = readl(REG(RTL_ICTL_GIMR));
-	value |= BIT(i->hwirq);
-	writel(value, REG(RTL_ICTL_GIMR));
+	for_each_cpu(cpu, &realtek_ictl_cpu_configurable) {
+		value = readl(REG(RTL_ICTL_GIMR, cpu));
+		value |= BIT(i->hwirq);
+		writel(value, REG(RTL_ICTL_GIMR, cpu));
+	}
 
 	raw_spin_unlock_irqrestore(&irq_lock, flags);
 }
@@ -44,52 +86,137 @@ static void realtek_ictl_mask_irq(struct
 {
 	unsigned long flags;
 	u32 value;
+	int cpu;
 
 	raw_spin_lock_irqsave(&irq_lock, flags);
 
-	value = readl(REG(RTL_ICTL_GIMR));
-	value &= ~BIT(i->hwirq);
-	writel(value, REG(RTL_ICTL_GIMR));
+	for_each_cpu(cpu, &realtek_ictl_cpu_configurable) {
+		value = readl(REG(RTL_ICTL_GIMR, cpu));
+		value &= ~BIT(i->hwirq);
+		writel(value, REG(RTL_ICTL_GIMR, cpu));
+	}
 
 	raw_spin_unlock_irqrestore(&irq_lock, flags);
 }
 
+static int __maybe_unused realtek_ictl_irq_affinity(struct irq_data *i,
+	const struct cpumask *dest, bool force)
+{
+	struct realtek_ictl_output *output = i->domain->host_data;
+	cpumask_t cpu_configure;
+	cpumask_t cpu_disable;
+	cpumask_t cpu_enable;
+	unsigned long flags;
+	int cpu;
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+
+	cpumask_and(&cpu_configure, cpu_present_mask, &realtek_ictl_cpu_configurable);
+
+	cpumask_and(&cpu_enable, &cpu_configure, dest);
+	cpumask_andnot(&cpu_disable, &cpu_configure, dest);
+
+	for_each_cpu(cpu, &cpu_disable)
+		write_irr(REG(RTL_ICTL_IRR0, cpu), i->hwirq, 0);
+
+	for_each_cpu(cpu, &cpu_enable)
+		write_irr(REG(RTL_ICTL_IRR0, cpu), i->hwirq, output->output_index + 1);
+
+	irq_data_update_effective_affinity(i, &cpu_enable);
+
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+
+	return IRQ_SET_MASK_OK;
+}
+
 static struct irq_chip realtek_ictl_irq = {
 	.name = "realtek-rtl-intc",
 	.irq_mask = realtek_ictl_mask_irq,
 	.irq_unmask = realtek_ictl_unmask_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = realtek_ictl_irq_affinity,
+#endif
 };
 
 static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
+	struct realtek_ictl_output *output = d->host_data;
+	unsigned long flags;
+
 	irq_set_chip_and_handler(irq, &realtek_ictl_irq, handle_level_irq);
 
+	raw_spin_lock_irqsave(&irq_lock, flags);
+
+	output->child_mask |= BIT(hw);
+	write_irr(REG(RTL_ICTL_IRR0, 0), hw, output->output_index + 1);
+
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+
 	return 0;
 }
 
+static int intc_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+	enum irq_domain_bus_token bus_token)
+{
+	struct realtek_ictl_output *output = d->host_data;
+	bool routed_elsewhere;
+	unsigned long flags;
+	u32 routing_old;
+	int cpu;
+
+	if (fwspec->fwnode != output->fwnode)
+		return false;
+
+	/* Original specifiers had only one parameter */
+	if (fwspec->param_count < 2)
+		return true;
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+
+	/*
+	 * Inputs can only be routed to one output, so they shouldn't be
+	 * allowed to end up in multiple domains.
+	 */
+	for_each_cpu(cpu, &realtek_ictl_cpu_configurable) {
+		routing_old = read_irr(REG(RTL_ICTL_IRR0, cpu), fwspec->param[0]);
+		routed_elsewhere = routing_old && fwspec->param[1] != routing_old - 1;
+		if (routed_elsewhere) {
+			pr_warn("soc int %d already routed to output %d\n",
+				fwspec->param[0], routing_old - 1);
+			break;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+
+	return !routed_elsewhere && fwspec->param[1] == output->output_index;
+}
+
 static const struct irq_domain_ops irq_domain_ops = {
 	.map = intc_map,
+	.select = intc_select,
 	.xlate = irq_domain_xlate_onecell,
 };
 
 static void realtek_irq_dispatch(struct irq_desc *desc)
 {
+	struct realtek_ictl_output *output = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	struct irq_domain *domain;
+	int cpu = smp_processor_id();
 	unsigned long pending;
 	unsigned int soc_int;
 
 	chained_irq_enter(chip, desc);
-	pending = readl(REG(RTL_ICTL_GIMR)) & readl(REG(RTL_ICTL_GISR));
+	pending = readl(REG(RTL_ICTL_GIMR, cpu)) & readl(REG(RTL_ICTL_GISR, cpu))
+		& output->child_mask;
 
 	if (unlikely(!pending)) {
 		spurious_interrupt();
 		goto out;
 	}
 
-	domain = irq_desc_get_handler_data(desc);
-	for_each_set_bit(soc_int, &pending, 32)
-		generic_handle_domain_irq(domain, soc_int);
+	for_each_set_bit(soc_int, &pending, RTL_ICTL_NUM_INPUTS)
+		generic_handle_domain_irq(output->domain, soc_int);
 
 out:
 	chained_irq_exit(chip, desc);
@@ -102,85 +229,110 @@ out:
  * thus go into 4 IRRs. A routing value of '0' means the interrupt is left
  * disconnected. Routing values {1..15} connect to output lines {0..14}.
  */
-static int __init map_interrupts(struct device_node *node, struct irq_domain *domain)
+static int __init setup_parent_interrupts(struct device_node *node, int *parents,
+	unsigned int num_parents)
 {
-	struct device_node *cpu_ictl;
-	const __be32 *imap;
-	u32 imaplen, soc_int, cpu_int, tmp, regs[4];
-	int ret, i, irr_regs[] = {
-		RTL_ICTL_IRR3,
-		RTL_ICTL_IRR2,
-		RTL_ICTL_IRR1,
-		RTL_ICTL_IRR0,
-	};
-	u8 mips_irqs_set;
+	struct realtek_ictl_output *outputs;
+	struct realtek_ictl_output *output;
+	struct irq_domain *domain;
+	unsigned int p;
 
-	ret = of_property_read_u32(node, "#address-cells", &tmp);
-	if (ret || tmp)
-		return -EINVAL;
+	outputs = kcalloc(num_parents, sizeof(*outputs), GFP_KERNEL);
+	if (!outputs)
+		return -ENOMEM;
 
-	imap = of_get_property(node, "interrupt-map", &imaplen);
-	if (!imap || imaplen % 3)
-		return -EINVAL;
+	for (p = 0; p < num_parents; p++) {
+		output = outputs + p;
 
-	mips_irqs_set = 0;
-	memset(regs, 0, sizeof(regs));
-	for (i = 0; i < imaplen; i += 3 * sizeof(u32)) {
-		soc_int = be32_to_cpup(imap);
-		if (soc_int > 31)
-			return -EINVAL;
-
-		cpu_ictl = of_find_node_by_phandle(be32_to_cpup(imap + 1));
-		if (!cpu_ictl)
-			return -EINVAL;
-		ret = of_property_read_u32(cpu_ictl, "#interrupt-cells", &tmp);
-		of_node_put(cpu_ictl);
-		if (ret || tmp != 1)
-			return -EINVAL;
-
-		cpu_int = be32_to_cpup(imap + 2);
-		if (cpu_int > 7 || cpu_int < 2)
-			return -EINVAL;
-
-		if (!(mips_irqs_set & BIT(cpu_int))) {
-			irq_set_chained_handler_and_data(cpu_int, realtek_irq_dispatch,
-							 domain);
-			mips_irqs_set |= BIT(cpu_int);
-		}
+		domain = irq_domain_add_linear(node, RTL_ICTL_NUM_INPUTS, &irq_domain_ops, output);
+		if (!domain)
+			goto domain_err;
 
-		/* Use routing values (1..6) for CPU interrupts (2..7) */
-		regs[(soc_int * 4) / 32] |= (cpu_int - 1) << (soc_int * 4) % 32;
-		imap += 3;
-	}
+		output->fwnode = of_node_to_fwnode(node);
+		output->output_index = p;
+		output->domain = domain;
 
-	for (i = 0; i < 4; i++)
-		writel(regs[i], REG(irr_regs[i]));
+		irq_set_chained_handler_and_data(parents[p], realtek_irq_dispatch, output);
+	}
 
 	return 0;
+
+domain_err:
+	while (p--) {
+		irq_set_chained_handler_and_data(parents[p], NULL, NULL);
+		irq_domain_remove(outputs[p].domain);
+	}
+
+	kfree(outputs);
+
+	return -ENOMEM;
 }
 
 static int __init realtek_rtl_of_init(struct device_node *node, struct device_node *parent)
 {
-	struct irq_domain *domain;
-	int ret;
+	int parent_irqs[RTL_ICTL_NUM_OUTPUTS];
+	struct of_phandle_args oirq;
+	unsigned int num_parents;
+	unsigned int soc_irq;
+	unsigned int p;
+	int cpu;
+
+	cpumask_clear(&realtek_ictl_cpu_configurable);
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		realtek_ictl_base[cpu] = of_iomap(node, cpu);
+		if (realtek_ictl_base[cpu]) {
+			cpumask_set_cpu(cpu, &realtek_ictl_cpu_configurable);
+
+			/* Disable all cascaded interrupts and clear routing */
+			writel(0, REG(RTL_ICTL_GIMR, cpu));
+			for (soc_irq = 0; soc_irq < RTL_ICTL_NUM_INPUTS; soc_irq++)
+				write_irr(REG(RTL_ICTL_IRR0, cpu), soc_irq, 0);
+		}
+	}
 
-	realtek_ictl_base = of_iomap(node, 0);
-	if (!realtek_ictl_base)
+	if (cpumask_empty(&realtek_ictl_cpu_configurable))
 		return -ENXIO;
 
-	/* Disable all cascaded interrupts */
-	writel(0, REG(RTL_ICTL_GIMR));
+	num_parents = of_irq_count(node);
+	if (num_parents > RTL_ICTL_NUM_OUTPUTS) {
+		pr_err("too many parent interrupts\n");
+		return -EINVAL;
+	}
 
-	domain = irq_domain_add_simple(node, 32, 0,
-				       &irq_domain_ops, NULL);
+	for (p = 0; p < num_parents; p++)
+		parent_irqs[p] = of_irq_get(node, p);
 
-	ret = map_interrupts(node, domain);
-	if (ret) {
-		pr_err("invalid interrupt map\n");
-		return ret;
+	if (WARN_ON(!num_parents)) {
+		/*
+		 * If DT contains no parent interrupts, assume MIPS CPU IRQ 2
+		 * (HW0) is connected to the first output. This is the case for
+		 * all known hardware anyway. "interrupt-map" is deprecated, so
+		 * don't bother trying to parse that.
+		 * Since this is to account for old devicetrees with one-cell
+		 * interrupt specifiers, only one output domain is needed.
+		 */
+		oirq.np = of_find_compatible_node(NULL, NULL, "mti,cpu-interrupt-controller");
+		if (oirq.np) {
+			oirq.args_count = 1;
+			oirq.args[0] = 2;
+
+			parent_irqs[0] = irq_create_of_mapping(&oirq);
+			num_parents = 1;
+		}
+
+		of_node_put(oirq.np);
 	}
 
-	return 0;
+	/* Ensure we haven't collected any errors before proceeding */
+	for (p = 0; p < num_parents; p++) {
+		if (parent_irqs[p] < 0)
+			return parent_irqs[p];
+		if (!parent_irqs[p])
+			return -ENODEV;
+	}
+
+	return setup_parent_interrupts(node, &parent_irqs[0], num_parents);
 }
 
 IRQCHIP_DECLARE(realtek_rtl_intc, "realtek,rtl-intc", realtek_rtl_of_init);
